Análisis exploratorio de los datos
data = read.csv('Corruption Perceptions Index - Dataset.csv', sep = ';')
data$african_union = as.logical(data$african_union)
data$arab_states = as.logical(data$arab_states)
data$BRICS = as.logical(data$BRICS)
data$EU = as.logical(data$EU)
data$G20 = as.logical(data$G20)
data$OECD = as.logical(data$OECD)
head(data)
## 'data.frame': 180 obs. of 42 variables:
## $ country : Factor w/ 180 levels "Afghanistan",..: 117 44 55 121 156 143 155 29 97 116 ...
## $ ISO3 : Factor w/ 180 levels "AFG","AGO","ALB",..: 124 45 54 122 28 142 153 27 99 121 ...
## $ region : Factor w/ 6 levels "AME","AP","ECA",..: 2 6 6 6 6 2 6 1 6 6 ...
## $ african_union : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ arab_states : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ BRICS : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ EU : logi FALSE TRUE TRUE FALSE FALSE FALSE ...
## $ G20 : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ OECD : logi TRUE TRUE TRUE TRUE TRUE FALSE ...
## $ CPI_score_2017 : int 89 88 85 85 85 84 84 82 82 82 ...
## $ standard_error_2017 : num 2.4 2.75 2.84 1.83 1.71 2.26 2.27 1.49 2.08 2.23 ...
## $ lower_CI_2017 : int 85 83 80 82 82 80 80 80 79 78 ...
## $ upper_CI_2017 : int 93 93 90 88 88 88 88 84 85 86 ...
## $ number_sources_2017 : int 8 8 8 8 7 9 8 8 6 8 ...
## $ CPI_score_2012 : int 90 90 90 85 86 87 88 84 80 84 ...
## $ standard_error_2012 : num 2.2 2 3 1.6 2.6 2.1 1.9 2.2 2.8 2 ...
## $ number_sources_2012 : int 7 7 7 7 6 9 7 7 6 7 ...
## $ CPI_score_2013 : int 91 91 89 86 85 86 89 81 80 83 ...
## $ standard_error_2013 : num 2.3 2.2 1.7 2.3 2.5 2.3 2.3 2.4 2.9 2 ...
## $ number_sources_2013 : int 7 7 7 7 6 9 7 7 6 7 ...
## $ CPI_score_2014 : int 91 92 89 86 86 84 87 81 82 83 ...
## $ standard_error_2014 : num 2.28 2.04 2.05 2.38 2.61 ...
## $ number_sources_2014 : int 7 7 7 7 6 8 7 7 6 7 ...
## $ CPI_score_2015 : int 91 91 90 88 86 85 89 83 85 84 ...
## $ standard_error_2015 : num 2.32 2.16 1.77 2.24 2.55 ...
## $ number_sources_2015 : int 7 7 7 7 6 8 7 7 5 7 ...
## $ CPI_score_2016 : int 90 90 89 85 86 84 88 82 81 83 ...
## $ standard_error_2016 : num 2.56 2.46 1.46 1.85 1.57 2.35 1.33 2.03 1.96 2.32 ...
## $ number_sources_2016 : int 7 7 7 7 6 8 7 7 6 7 ...
## $ world_bank_CPIA : int 0 0 0 0 0 0 0 0 0 0 ...
## $ world_economic_forum_EOS : int 92 80 92 86 85 90 74 80 86 84 ...
## $ global_insight_country_risk_ratings : int 83 83 83 83 83 83 83 83 83 83 ...
## $ bertelsmann_foundation_transformation_index : int 0 0 0 0 0 73 0 0 0 0 ...
## $ african_development_bank_CPIA : int 0 0 0 0 0 0 0 0 0 0 ...
## $ IMD_world_competitiveness_yearbook : int 93 99 92 85 89 90 84 86 84 89 ...
## $ bertelsmann_foundation_sustainable_governance_index: int 97 97 88 79 88 0 88 79 79 70 ...
## $ world_justice_project_rule_of_law_index : int 82 88 86 86 0 85 86 80 0 80 ...
## $ PRS_international_country_risk_guide : int 93 93 93 93 85 79 93 85 85 85 ...
## $ varieties_of_democracy_project : int 77 77 75 77 77 77 77 77 0 77 ...
## $ economist_intelligence_unit_country_ratings : int 90 90 72 90 90 90 90 90 72 90 ...
## $ freedom_house_nations_in_transit_ratings : int 0 0 0 0 0 0 0 0 0 0 ...
## $ PERC_asia_risk_guide : int 0 0 0 0 0 92 0 0 0 0 ...
cols = c('country',
'CPI_score_2012',
'CPI_score_2013',
'CPI_score_2014',
'CPI_score_2015',
'CPI_score_2016',
'CPI_score_2017')
data_cpi = data[cols]
colnames(data_cpi) = c('country', '2012', '2013', '2014', '2015', '2016', '2017')
data_cpi_top_20 = head(data_cpi[sort(data_cpi$`2017`, decreasing = TRUE, index.return=TRUE)$ix, ], 20)
data_cpi_top_20_melt = melt(data_cpi_top_20, id.vars='country')
ggplot(data_cpi_top_20_melt, aes(x=variable, y=value, color=country)) +
geom_point() +
geom_line(aes(group=country)) +
xlab("Year") + ylab("CPI") +
scale_color_discrete(name="Countries",
breaks=data_cpi_top_20$country) +
ggtitle("20 countries with higher CPI in 2017") +
theme(plot.title = element_text(hjust = 0.5))

data_cpi_top_20 = head(data_cpi[sort(data_cpi$`2017`, decreasing = FALSE, index.return=TRUE)$ix, ], 20)
data_cpi_top_20_melt = melt(data_cpi_top_20, id.vars='country')
ggplot(data_cpi_top_20_melt, aes(x=variable, y=value, color=country)) +
geom_point() +
geom_line(aes(group=country)) +
xlab("Year") + ylab("CPI") +
scale_color_discrete(name="Countries",
breaks=data_cpi_top_20$country) +
ggtitle("20 countries with lower CPI in 2017") +
theme(plot.title = element_text(hjust = 0.5))

CPI_score_cols = c('CPI_score_2012',
'CPI_score_2013',
'CPI_score_2014',
'CPI_score_2015',
'CPI_score_2016',
'CPI_score_2017')
number_sources_cols = c('number_sources_2012',
'number_sources_2013',
'number_sources_2014',
'number_sources_2015',
'number_sources_2016',
'number_sources_2017')
cols = c(CPI_score_cols, number_sources_cols)
# Calculate mean por each region
ame = sapply(data[data$region == 'AME', cols], mean)
we_eu = sapply(data[data$region == 'WE/EU', cols], mean)
ap = sapply(data[data$region == 'AP', cols], mean)
eca = sapply(data[data$region == 'ECA', cols], mean)
mena = sapply(data[data$region == 'MENA', cols], mean)
ssa = sapply(data[data$region == 'SSA', cols], mean)
regions = rbind(ame, we_eu, ap, eca, mena, ssa)
rownames(regions) = c('AME', 'WE/EU', 'AP', 'ECA', 'MENA', 'SSA')
# Melt dataframes by region
cpi_score_df = regions[, CPI_score_cols]
colnames(cpi_score_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
cpi_score_df_melt = melt(cpi_score_df, id.vars=rownames)
number_sources_df = regions[, number_sources_cols]
colnames(number_sources_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
number_sources_df_melt = melt(number_sources_df, id.vars=rownames)
# Plot comparative between CPI and number of sources
g1 = ggplot(cpi_score_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) +
geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
xlab("Regions") + ylab("Mean CPI") +
ggtitle("Mean CPI by region and year") +
theme(plot.title = element_text(hjust = 0.5),
legend.title=element_blank())
g2 = ggplot(number_sources_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) +
geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
xlab("Regions") + ylab("Mean number of sources") +
ggtitle("Mean number of sources by region and year") +
theme(plot.title = element_text(hjust = 0.5),
legend.title=element_blank())
grid.arrange(g1, g2, nrow = 1)

# Calculate mean por each economic group
african_union = sapply(data[data$african_union == TRUE, cols], mean)
arab_states = sapply(data[data$arab_states == TRUE, cols], mean)
BRICS = sapply(data[data$BRICS == TRUE, cols], mean)
EU = sapply(data[data$EU == TRUE, cols], mean)
G20 = sapply(data[data$G20 == TRUE, cols], mean)
OECD = sapply(data[data$OECD == TRUE, cols], mean)
economic_groups = rbind(african_union, arab_states, BRICS, EU, G20, OECD)
rownames(economic_groups) = c('African Union', 'Arab States', 'BRICS', 'EU', 'G20', 'OECD')
# Melt dataframes by economic group
cpi_score_df = economic_groups[, CPI_score_cols]
colnames(cpi_score_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
cpi_score_df_melt = melt(cpi_score_df, id.vars=rownames)
number_sources_df = economic_groups[, number_sources_cols]
colnames(number_sources_df) = c('2012', '2013', '2014', '2015', '2016', '2017')
number_sources_df_melt = melt(number_sources_df, id.vars=rownames)
# Plot comparative between CPI and number of sources
g1 = ggplot(cpi_score_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) +
geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
xlab("Economic groups") + ylab("Mean CPI") +
ggtitle("Mean CPI by economic group and year") +
theme(plot.title = element_text(hjust = 0.5),
legend.title=element_blank())
g2 = ggplot(number_sources_df_melt, aes(x=Var1, y=value, fill=as.factor(Var2))) +
geom_bar(aes(group=Var2), stat = "identity", position = "dodge") +
xlab("Economic groups") + ylab("Mean number of sources") +
ggtitle("Mean number of sources by economic group and year") +
theme(plot.title = element_text(hjust = 0.5),
legend.title=element_blank())
grid.arrange(g1, g2, nrow = 1)

Mediana borrosa por región
fuzzy_quantile = function(fuzzy_number_list, deltas, p) {
# Calculate delta-cuts of all fuzzy numbers
alphacut_list = c()
for(d in 1:length(deltas)) {
alphacut = c()
for(i in 1:length(fuzzy_number_list)) {
alphacut = c(alphacut, alphacut(fuzzy_number_list[[i]], deltas[d]))
}
alphacut_list[[d]] = sort(alphacut)
}
freq_dist_L = matrix(rep(0, len = length(deltas) * length(fuzzy_number_list) * 2),
nrow = length(deltas))
freq_dist_U = matrix(rep(0, len = length(deltas) * length(fuzzy_number_list) * 2),
nrow = length(deltas))
# Iterate over delta-cuts
for (i in 1:(length(fuzzy_number_list) * 2)) {
# Iterate over the fuzzy numbers
for (fuzzy_num in 1:length(fuzzy_number_list)) {
alphacut = alphacut(fuzzy_number_list[[fuzzy_num]], deltas)
# Iterate over deltas
for (d in 1:length(deltas)) {
# Intersec
if(alphacut[d, 'L'] <= alphacut_list[[d]][i]) {
freq_dist_U[d, i] = freq_dist_U[d, i] + 1
}
# Subset
if(alphacut[d, 'U'] <= alphacut_list[[d]][i]) {
freq_dist_L[d, i] = freq_dist_L[d, i] + 1
}
}
}
}
freq_dist_L = freq_dist_L / length(fuzzy_number_list)
freq_dist_U = freq_dist_U / length(fuzzy_number_list)
m = matrix(rep(0, len = 2 * length(deltas)), nrow = length(deltas))
fuzzy_quantile = data.frame(m, row.names = deltas)
colnames(fuzzy_quantile) = c('L', 'U')
# Upper
for(r in 1:nrow(freq_dist_L)) {
for(c in 1:ncol(freq_dist_L)) {
if(freq_dist_L[r, c] >= p) {
fuzzy_quantile[r, 'U'] = alphacut_list[[r]][c]
break
}
}
}
# Lower
for(r in 1:nrow(freq_dist_U)) {
for(c in 1:ncol(freq_dist_U)) {
if(freq_dist_U[r, c] >= p) {
fuzzy_quantile[r, 'L'] = alphacut_list[[r]][c]
break
}
}
}
return(fuzzy_quantile)
}
CPI_score_cols = c('CPI_score_2017')
ame = median(data[data$region == 'AME', 'CPI_score_2017'])
we_eu = median(data[data$region == 'WE/EU', 'CPI_score_2017'])
ap = median(data[data$region == 'AP', 'CPI_score_2017'])
eca = median(data[data$region == 'ECA', 'CPI_score_2017'])
mena = median(data[data$region == 'MENA', 'CPI_score_2017'])
ssa = median(data[data$region == 'SSA', 'CPI_score_2017'])
crisp_median_regions = rbind(ame, we_eu, ap, eca, mena, ssa)
rownames(crisp_median_regions) = c('AME', 'WE/EU', 'AP', 'ECA', 'MENA', 'SSA')
crisp_median_regions
## [,1]
## AME 38.5
## WE/EU 63.0
## AP 38.0
## ECA 35.0
## MENA 37.5
## SSA 31.0
# Get index from the original data for each region
ame = as.integer(rownames(data[data$region == 'AME', ]))
we_eu = as.integer(rownames(data[data$region == 'WE/EU', ]))
ap = as.integer(rownames(data[data$region == 'AP', ]))
eca = as.integer(rownames(data[data$region == 'ECA', ]))
mena = as.integer(rownames(data[data$region == 'MENA', ]))
ssa = as.integer(rownames(data[data$region == 'SSA', ]))
# Calculate median for each region
deltas = c(0, 0.25, 0.5, 0.75, 1)
p = 0.5
median_fuzzy_CPI_ame = fuzzy_quantile(fuzzy_CPI[ame], deltas, p)
median_fuzzy_CPI_we_eu = fuzzy_quantile(fuzzy_CPI[we_eu], deltas, p)
median_fuzzy_CPI_ap = fuzzy_quantile(fuzzy_CPI[ap], deltas, p)
median_fuzzy_CPI_eca = fuzzy_quantile(fuzzy_CPI[eca], deltas, p)
median_fuzzy_CPI_mena = fuzzy_quantile(fuzzy_CPI[mena], deltas, p)
median_fuzzy_CPI_ssa = fuzzy_quantile(fuzzy_CPI[ssa], deltas, p)
median_fuzzy_regions = list(median_fuzzy_CPI_ame, median_fuzzy_CPI_we_eu, median_fuzzy_CPI_ap,
median_fuzzy_CPI_eca, median_fuzzy_CPI_mena, median_fuzzy_CPI_ssa)
regions = c('AME', 'WE/EU', 'AP', 'ECA', 'MENA', 'SSA')
crisp_unem_women_weight = c()
legend_text = c()
colors = rainbow(length(median_fuzzy_regions))
# Plot median of the fuzzy numbers with the respective crisp number
for (i in 1:length(median_fuzzy_regions)) {
add = TRUE
if (i == 1) {
add = FALSE
}
fuzzy = median_fuzzy_regions[[i]]
fuzzy_median = PiecewiseLinearFuzzyNumber(fuzzy[1, "L"],
fuzzy[length(deltas), "L"],
fuzzy[length(deltas), "U"],
fuzzy[1, "U"],
knot.n = length(deltas),
knot.alpha = sort(deltas),
knot.left = sort(fuzzy[[1]]),
knot.right = sort(fuzzy[[2]]))
plot(fuzzy_median, add = add, col = colors[i], xlim = c(10, 90))
points(crisp_median_regions[i, ], 1, col = colors[i], pch = 8)
legend_text = c(legend_text, regions[i])
}
legend('topright', legend=legend_text, lty=1, col=colors)

Map comparativo del CPI crisp con el fuzzificado
fuzzy_CPI_score_2017 = sapply(fuzzy_CPI, expectedValue)
data['fuzzy_CPI_score_2017'] = round(fuzzy_CPI_score_2017)
data[c('country', 'CPI_score_2017', 'fuzzy_CPI_score_2017')]
par(mfrow = c(2, 1), mai = c(0.1, 0.2, 0.2, 0.2))
n = joinCountryData2Map(data, joinCode="ISO3", nameJoinColumn="ISO3")
# Crisp CPI map
map_params = mapCountryData(n,
nameColumnToPlot="CPI_score_2017",
mapTitle="Crisp Corruption Perception Index 2017",
missingCountryCol="snow2",
colourPalette=c('darkred', 'red', 'yellow'),
borderCol='white',
addLegend=FALSE,
numCats=10,
catMethod=seq(from = 0, to = 100, by = 5))
# Fuzzy CPI map
map_params = mapCountryData(n,
nameColumnToPlot="fuzzy_CPI_score_2017",
mapTitle="Fuzzy Corruption Perception Index 2017",
missingCountryCol="snow2",
colourPalette=c('darkred', 'red', 'yellow'),
borderCol='white',
addLegend=FALSE,
numCats=10,
catMethod=seq(from = 0, to = 100, by = 5))
# Create legend
do.call(addMapLegend,
c(map_params,
legendLabels="all",
legendWidth=0.5,
legendIntervals="data",
legendMar=5))
